Uploading data
total <- read_csv("data/LTINT_GDP.csv")
df_gdp <- read_csv("data/GDP_yearly.csv")
df_ltint <- read_csv("data/LTINT_yearly.csv")
Making data wide I removed the Flag Codes to make the wide frames more manageable. Flags were not necessary for the analysis.
df_ltint_wide <- df_ltint %>%
select(-`Flag Codes`) %>%
pivot_wider(names_from = INDICATOR, values_from = Value)
Using MEASURE variable as the pivot to make it wide (names_from = MEASURE) because there were two different measures of GDP (million USD and per capita) Then dropping the INDICATOR variable that had GDP as values
df_gdp_wide <- df_gdp %>%
select(-`Flag Codes`) %>%
pivot_wider(names_from = MEASURE, values_from = Value, names_prefix = "GDP_") %>%
select(-INDICATOR)
Merging GDP and long term interest rates in one dataframe
df_merged <- df_gdp_wide %>% left_join(df_ltint_wide %>% select(LOCATION, TIME, LTINT), by = c("LOCATION","TIME"))
df_debt_ratio <- read_csv("data/debt_gdp_ratio_oecd.csv")
## Parsed with column specification:
## cols(
## LOCATION = col_character(),
## INDICATOR = col_character(),
## SUBJECT = col_character(),
## MEASURE = col_character(),
## FREQUENCY = col_character(),
## TIME = col_double(),
## Value = col_double(),
## `Flag Codes` = col_logical()
## )
df_debt_ratio <- df_debt_ratio %>% rename(debt_gdp = Value)
df_debt_wide <- df_debt_ratio %>%
select(-`Flag Codes`) %>%
pivot_wider(names_from = MEASURE, values_from = debt_gdp) %>%
select(-INDICATOR)
df_debt_wide
## # A tibble: 799 x 5
## LOCATION SUBJECT FREQUENCY TIME PC_GDP
## <chr> <chr> <chr> <dbl> <dbl>
## 1 AUS TOT A 1995 57.6
## 2 AUS TOT A 1996 55.4
## 3 AUS TOT A 1997 54.6
## 4 AUS TOT A 1998 52.4
## 5 AUS TOT A 1999 44.8
## 6 AUS TOT A 2000 41.1
## 7 AUS TOT A 2001 40.4
## 8 AUS TOT A 2002 38.7
## 9 AUS TOT A 2003 35.7
## 10 AUS TOT A 2004 32.3
## # … with 789 more rows
Merging dataframes (long term interest rates, debt-to-gdp ratio and gdp)
df_merged <- df_gdp_wide %>%
left_join(df_ltint_wide %>% select(LOCATION, TIME, LTINT), by = c("LOCATION","TIME")) %>%
left_join(df_debt_wide %>% select(LOCATION, TIME, PC_GDP), by = c("LOCATION","TIME")) %>%
rename(debt_gdp = PC_GDP)
df_merged
## # A tibble: 2,485 x 8
## LOCATION SUBJECT FREQUENCY TIME GDP_MLN_USD GDP_USD_CAP LTINT debt_gdp
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AUS TOT A 1960 25035. 2409. NA NA
## 2 AUS TOT A 1961 25326. 2380. NA NA
## 3 AUS TOT A 1962 27913. 2574. NA NA
## 4 AUS TOT A 1963 30386. 2748. NA NA
## 5 AUS TOT A 1964 32694. 2898. NA NA
## 6 AUS TOT A 1965 34490. 2998. NA NA
## 7 AUS TOT A 1966 38285. 3271. NA NA
## 8 AUS TOT A 1967 41004. 3442. NA NA
## 9 AUS TOT A 1968 46488. 3828. NA NA
## 10 AUS TOT A 1969 52132. 4202. NA NA
## # … with 2,475 more rows
Creating dataset from 2006 (date where long term interest rates data starts)
df_merged_2006_2020 <- df_merged %>% filter(TIME>=2006)
df_merged_2006_2020 %>% write_csv("gdp_ltint_debt2006_2020.csv")
Comparing what countries are in each dataframe
countries_gdp <- df_gdp %>% select(LOCATION) %>% unique()
countries_debt <- df_debt_ratio %>% select(LOCATION) %>% unique()
countries_ltint <- df_ltint %>% select(LOCATION) %>% unique()
setdiff(countries_gdp, countries_debt)
## # A tibble: 29 x 1
## LOCATION
## <chr>
## 1 KOR
## 2 NZL
## 3 CHN
## 4 IND
## 5 IDN
## 6 RUS
## 7 ZAF
## 8 DEW
## 9 EU28
## 10 OECD
## # … with 19 more rows
setdiff(countries_debt, countries_ltint)
## # A tibble: 2 x 1
## LOCATION
## <chr>
## 1 TUR
## 2 EST
setdiff(countries_gdp, countries_ltint)
## # A tibble: 24 x 1
## LOCATION
## <chr>
## 1 TUR
## 2 CHN
## 3 EST
## 4 IDN
## 5 DEW
## 6 EU28
## 7 OECD
## 8 OECDE
## 9 BRA
## 10 SAU
## # … with 14 more rows
df_merged_2006_2020 %>% summary(debt_gdp)
## LOCATION SUBJECT FREQUENCY TIME
## Length:900 Length:900 Length:900 Min. :2006
## Class :character Class :character Class :character 1st Qu.:2009
## Mode :character Mode :character Mode :character Median :2013
## Mean :2013
## 3rd Qu.:2016
## Max. :2020
##
## GDP_MLN_USD GDP_USD_CAP LTINT debt_gdp
## Min. : 9439 Min. : 1551 Min. :-0.5238 Min. : 7.196
## 1st Qu.: 180282 1st Qu.: 20406 1st Qu.: 1.4919 1st Qu.: 45.547
## Median : 443502 Median : 33335 Median : 3.3596 Median : 67.829
## Mean : 3254583 Mean : 33862 Mean : 3.7163 Mean : 76.844
## 3rd Qu.: 2174240 3rd Qu.: 43884 3rd Qu.: 5.2444 3rd Qu.:103.304
## Max. :63079189 Max. :120670 Max. :22.4975 Max. :238.726
## NA's :14 NA's :332 NA's :427
median_debt <- median(df_merged_2006_2020$debt_gdp, na.rm=T)
Visualising the relationship between GDP and long term interest rates in 2019 (debt-to-gdp ratio data for 2020 is very patchy) Colour of the dot corresponds to whether the country is above or below the median debt-to-gdp ratio in 2019
median_debt_2019 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION!="EU27_2020", LOCATION!="OECDE", LOCATION!="EA19") %>%
pull(debt_gdp) %>% median(na.rm=T)
Scatterplot
p1 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION!="EU27_2020", LOCATION!="OECDE", LOCATION!="EA19") %>%
mutate(above_debt_median=if_else(debt_gdp>=median_debt_2019,TRUE,FALSE)) %>%
ggplot(aes(LTINT, GDP_MLN_USD, colour=above_debt_median, group=LOCATION)) +
geom_point()
ggplotly(p1)
Visualising the relationship between GDP and long term interest rates in 2013 Colour of the dot corresponds to whether the country is above or below the median debt-to-gdp ratio in 2013
median_debt_2013 <- df_merged_2006_2020 %>%
filter(TIME==2013, LOCATION!="EU27_2020", LOCATION!="OECDE", LOCATION!="EA19") %>%
pull(debt_gdp) %>% median(na.rm=T)
Scatterplot
p2 <- df_merged_2006_2020 %>%
filter(TIME==2013, LOCATION!="EU27_2020", LOCATION!="OECDE", LOCATION!="EA19") %>%
mutate(above_debt_median=if_else(debt_gdp>=median_debt_2013,TRUE,FALSE)) %>%
ggplot(aes(LTINT, GDP_MLN_USD, colour=above_debt_median, group=LOCATION)) +
geom_point()
ggplotly(p2)
Creating a eurozone countries variable
eurozone <- c("AUT","BEL","CYP","EST","FIN","FRA","DEU","GRC","IRL","ITA","LVA","LTU","LUX","MLT","NLD","PRT","SVK","SVN","ESP")
median_debt_euro_2019 <- df_merged_2006_2020 %>%
filter(TIME==2013, LOCATION %in% eurozone) %>%
pull(debt_gdp) %>% median(na.rm=T)
Visualising long term interest rates and GDP relationship, colour-coded by above or below median of eurozone debt-to-gdp ratio
p3 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% eurozone) %>%
mutate(above_debt_median=if_else(debt_gdp>=median_debt_euro_2019,TRUE,FALSE)) %>%
ggplot(aes(LTINT, GDP_MLN_USD, colour=above_debt_median, group=LOCATION)) +
geom_point()
ggplotly(p3)
Visualising long term interest rates and debt-to-gdp ratio relationship in eurozone
p4 <- df_merged_2006_2020 %>%
filter(TIME==2019, LOCATION %in% eurozone) %>%
ggplot(aes(LTINT, debt_gdp, colour=LOCATION)) +
geom_point()
ggplotly(p4)